Lab 4A: Clustering (Easy)
1. Explore the Data
- Load Iris data
data(iris)
- Load color brewer library
library(RColorBrewer)
- Create a color palette
palette <- brewer.pal(3, "Set2")
- Create a scatterplot matrix colored by species
plot(
x = iris[1:4],
col = palette[as.numeric(iris$Species)],
pch = 19)
- View scatterplot of petal length vs width
plot(
x = iris$Petal.Length,
y = iris$Petal.Width,
col = palette[as.numeric(iris$Species)],
pch = 19)
2. Create Clusters with K-Means
- Set seed to make randomness reproducable
set.seed(42)
- Create K-means clusters
clusters <- kmeans(
x = iris[, 1:4],
centers = 3,
nstart = 10)
- Plot each cluster as a shape
plot(
x = iris$Petal.Length,
y = iris$Petal.Width,
col = palette[as.numeric(iris$Species)],
pch = clusters$cluster)
- Plot centroid of clusters
plot(
x = iris$Petal.Length,
y = iris$Petal.Width,
col = palette[as.numeric(iris$Species)],
pch = clusters$cluster)
points(
x = clusters$centers[, "Petal.Length"],
y = clusters$centers[, "Petal.Width"],
pch = 4,
lwd = 4,
col = "blue")
3. Create Hierachical Clusters
- Create hierachical clusters
hclusters <- hclust(dist(iris[ ,1:4]))
- Plot dendrogram of clusters
plot(
x = hclusters,
labels = as.numeric(iris$Species))
- Cut tree into three clusters
cuts <- cut(
x = as.dendrogram(hclusters),
h = 4)
- Get clusters as vector
cuts2 <- cutree(
tree = hclusters,
k = 3)
- Plot clusters and color by species
plot(
x = iris$Petal.Length,
y = iris$Petal.Width,
col = palette[as.numeric(iris$Species)],
pch = cuts2)